CISC3024 Pattern Recognition Final Project¶

Group Members:¶

  • Huang Yanzhen, DC126732
  • Mai Jiajun, DC127853

0. Project Setup¶

0.1 Packages & Device¶

InĀ [1]:
# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from tqdm import tqdm

# Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import scipy.io as sio

# Visualize Result
from sklearn.metrics import (confusion_matrix, accuracy_score,
                            precision_score, recall_score,
                            f1_score, roc_auc_score,
                            roc_curve, auc, precision_recall_curve,
                            average_precision_score)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize

# Basic
import numpy as np
import cv2
import os
import time
from typing import List, Tuple, Union
import random
import itertools
import copy
E:\Courses\CISC3024-Pattern-Recognition\cisc3024_pr_venv\lib\site-packages\albumentations\__init__.py:13: UserWarning: A new version of Albumentations is available: 1.4.21 (you have 1.4.18). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.
  check_for_updates()
InĀ [2]:
device_name = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
print(f"Using device: {device_name}")
Using device: cuda

0.2 Global Configurations¶

InĀ [3]:
path_dataset = "./data/SVHN_mat"

1. Data Processing and Augmentation¶

1.1 Download Datasets¶

Define dataset class, retrieve dataset.

1.1 Notes¶

InĀ [Ā ]:
_dat = sio.loadmat(os.path.join(path_dataset, "train_32x32.mat"))
# _dat['X'][0][0][0]
# np.array(_dat).shape
dat = np.transpose(_dat['X'], (3, 0, 1, 2))
# dat = dat.astype(np.float32)
dat.shape
InĀ [Ā ]:
# First Image
dat[0].shape
InĀ [Ā ]:
# First Row of Image
dat[0][0]
InĀ [Ā ]:
# First Pixel of Image
dat[0][0][0]
InĀ [Ā ]:
_transform = A.Compose([
    A.Normalize(mean=[0.4376845359802246, 0.4437684714794159, 0.47280389070510864], std=[0.19803018867969513, 0.2010156661272049, 0.19703581929206848]),
    ToTensorV2()
])

_img = dat[0]
_img = _transform(image=_img)['image']
# print(_img)
_img.shape

1.2 Dataset¶

InĀ [4]:
class SVHNDataset(Dataset):
    def __init__(self, mat_file, transform=None):
        data = sio.loadmat(mat_file)
        
        self.images = np.transpose(data['X'], (3, 0, 1, 2))
        self.labels = data['y'].flatten()
        self.labels[self.labels == 10] = 0
        self.transform = transform        # Allow postponed injection of transform.

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # There should always be a transform. 
        # It converts image to float, and permutes it from (32, 32, 3) to Tensor([3, 32, 32]).
        # ...which is important!!
        if self.transform is None:        
            raise ValueError("CISC3024 Custom Error: The transform should not be None when this object is passed into a DataLoader.")

        image = self.transform(image=image)['image']
        return image, label
    
    def get_meanstd(self, bias=None):
        if bias is not None:
            random_bias = random.randint(0, bias)
            images_ = []
            for i in range(len(self.images)):
                image = self.images[i]
                image = image.astype(np.int16)
                image = (image + random_bias) % 256
                image = image.astype(np.uint8)
                images_.append(image)
            images_ = np.array(images_)
        else:
            images_ = self.images
                
        images_ = images_.astype(np.float32) / 255.0
        mean = np.mean(images_, axis=(0,1,2))
        std = np.std(images_, axis=(0,1,2), ddof=0)
        
        return mean.tolist(), std.tolist()        
    
    def overwrite(self, indices:Union[list, np.ndarray]):
        """
        Create a deep copy of the mother dataset instance and only keep the wanted
        data samples, controlled by indices.
        """
        if any(index < 0 or index >= len(self.labels) for index in indices):
            raise IndexError("CISC3024 Custom Error: One or more indices are out of bounds.")
        
        new_dataset = copy.deepcopy(self)
        new_dataset.images = self.images[indices]
        new_dataset.labels = self.labels[indices]
        return new_dataset

1.2 Peak A Data¶

InĀ [208]:
def peek(dataset, index=None):
    def unnormalize(img, mean, std):
        """Revert the normalization for visualization."""
        img = img * std + mean
        return np.clip(img, 0, 1)

    mean, std = dataset.get_meanstd()

    # Plotting multiple images in a grid
    grid_rows, grid_cols = 1, 6
    
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(6, 6))

    peek_index = random.randint(0, dataset.__len__()-1) if index is None else index
    
    for i in range(grid_cols):
        img_tensor, label = dataset.__getitem__(peek_index)
        img = img_tensor.permute(1, 2, 0).numpy()  # Convert to (H, W, C)
        img = unnormalize(img, mean, std)
    
        ax = axes[i]  # Get subplot axis
        ax.imshow(img)
        ax.set_title(f"Label: {label}")
    
    plt.tight_layout()
    plt.show()
    print(f"Peeking data from training set of index {peek_index}.\nImage Tnesor Size:{dataset.__getitem__(peek_index)[0].shape}")

2. Neural Network¶

2.1 Model Structure¶

InĀ [7]:
class SmallVGG(nn.Module):
    def __init__(self, frame_size=32):
        super(SmallVGG, self).__init__()
        self.frame_size = frame_size
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 16x16

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 8x8

            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 4x4
        )
    
        self.fc_layers = nn.Sequential(
            nn.Linear(frame_size * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

2.2 Train and Evaluate Function¶

InĀ [165]:
def train_and_evaluate(model,
                      train_loader,
                      valid_loader,
                      criterion,
                      optimizer,
                      num_epochs=100,
                      stop_early_params=None):
    # Record Losses to plot
    train_losses = []
    valid_losses = []

    # Early stop params
    current_optimized_model = None
    current_min_valid_loss = np.inf
    num_overfit_epochs = 0

    for epoch in range(num_epochs):
        # Train
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * len(images)
        train_losses.append(running_loss / len(train_loader))

        # Evaluate
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                valid_loss += loss.item() * len(images)

        valid_losses.append(valid_loss / len(valid_loader))
        print(f"Epoch[{epoch+1}/{num_epochs}], Train Loss:{train_losses[-1]:.4f}, Validation Loss:{valid_losses[-1]:.4f}")

        # Early Stop?
        if stop_early_params is None:
            continue
    
        if current_min_valid_loss - stop_early_params["min_delta"] > valid_losses[-1]: # Validation loss decreases
            current_min_valid_loss = valid_losses[-1]
            current_optimized_model = copy.deepcopy(model)
            num_overfit_epochs = (num_overfit_epochs - 1) if num_overfit_epochs > 0 else 0
        else: # Validation loss increases
            num_overfit_epochs += 1

        if num_overfit_epochs > stop_early_params["patience"]:
            print(f"Early stopping at epoch {epoch+1}.")
            model = current_optimized_model
            break

    return train_losses, valid_losses

2.3 Get Predictions¶

Multiple functions are defined to evaluate data. Below is a list of them.

InĀ [9]:
def get_predictions(model_path, extra_loader):
    if not isinstance(model_path, str):
        model_state = model_path
    else:
        model_state = torch.load(model_path)
    model = SmallVGG()
    model.load_state_dict(model_state)
    
    model.to(device)
    model.eval() 
    
    pred_scores = []  # Prob. of predictions
    true_labels = []  # Ground Truth
    pred_labels = []  # Label of prediction, i.e., argmax(softmax(pred_scores))
    
    with torch.no_grad():
        for images, labels in tqdm(extra_loader):
            images, labels = images.to(device), labels.to(device)
        
            outputs = model(images)
    
            pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
    
            pred_scores.extend(pred_scores_batch.cpu().tolist())
            pred_labels.extend(outputs.argmax(dim=1).tolist())
            true_labels.extend(labels.cpu().tolist())
            
    return pred_scores, true_labels, pred_labels

2.4 Get Metrics¶

InĀ [10]:
def get_metrics(true_labels, pred_labels):
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    recall = recall_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    f1 = f1_score(true_labels, pred_labels, zero_division=0, average=None, labels=range(0,10))

    return accuracy, precision, recall, f1
InĀ [11]:
def print_metrics(accuracies, f1s):
    print(f"Accuracies:")
    for acc in accuracies:
        print(f"{acc:.3f}", end=" ")
    print("\n")
    
    print(f"F1 Score Lists:")
    mean_f1s = []
    for f1 in f1s:
        for val in f1:
            print(f"{val:.3f}", end=" ")
        mean_f1 = np.mean(f1)
        std_f1 = np.std(f1)
        mean_f1s.append(mean_f1)
        print(f"| Avg F1={mean_f1:.3f}, Std F1={std_f1}")
    print(f"Best: {np.argmax(mean_f1s)+1}-th")
InĀ [12]:
# Compute ROC AUC for each class
def get_roc_auc(true_labels_bin, pred_labels_bin):
    roc_auc = dict()
    for i in range(0, 10):
        roc_auc[i] = roc_auc_score(true_labels_bin[:,i], np.array(pred_scores)[:, i])
    return roc_auc

3. Experiments¶

3.0 Preparation¶

3.0.1 Plot Functions¶

The experiments will be a list of the following structures:

{
    "HYPER_PARAM_1": combo[0],
    "HYPER_PARAM_2": combo[1],
    "train_losses": train_losses,
    "valid_losses": valid_losses,
    "model_state_dict": exp_model.state_dict()
}

Epoch-Loss Curves¶

InĀ [13]:
def plot_el(loaded_experiments, hyper_param_names, n_rows=4, n_cols=4):
    fig_size = (n_cols * 5, n_rows * 5)
    n1, n2 = hyper_param_names
    
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=fig_size)
    # plt.tight_layout()
    
    for i, ax in enumerate(axes.flat):
        train_losses, valid_losses = loaded_experiments[i]["train_losses"], loaded_experiments[i]["valid_losses"]
        
        ax.plot(train_losses, label=f"TRL, min={np.min(train_losses):.3f}")
        ax.plot(valid_losses, label=f"VAL, min={np.min(valid_losses):.3f} at step={np.argmin(valid_losses)}")
        ax.set_xlabel("Epochs")
        ax.set_ylabel("Loss")
        ax.set_title(f"{n1}={loaded_experiments[i][n1]}, {n2}={loaded_experiments[i][n2]}")
        ax.legend(loc="upper right")
    
    plt.show()

Get Experiment Results¶

InĀ [14]:
def get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loader):
    experiment_results = []
    n1, n2 = test_hyperparam_names
    for i, exp in enumerate(loaded_experiments):
        pred_scores, true_labels, pred_labels = get_predictions(exp['model_state_dict'], extra_loader)
        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })

        print(f"First 10 true labels:")
        [print(num, end=" ") for num in true_labels[:10]]
        print(f"...\n")

        print(f"First 10 pred labels:")
        [print(num, end=" ") for num in pred_labels[:10]]
        print(f"...\n")

        print(f"First 5 pred_scores:")
        [print(num, end=" ") for num in pred_scores[:5]]
        print(f"...\n")

        # del pred_scores, true_labels, pred_lables
        torch.cuda.empty_cache()
    return experiment_results

Confusion Matrix¶

InĀ [15]:
def plot_cm(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels = exp_rs['true_labels'], exp_rs['pred_labels']
        cm = confusion_matrix(true_labels, pred_labels)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(0,10))
        disp.plot(ax=axes[i], cmap = plt.cm.Blues)
        axes[i].set_title(f"Exp {i+1}: {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
    
    plt.tight_layout()
    plt.show()

Precision-Recall Curve¶

InĀ [16]:
def plot_pr(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows,n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    accuracies = []
    f1_scores = []
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_labels'], exp_rs['pred_scores']
        true_labels_bin, pred_labels_bin = label_binarize(true_labels, classes=range(0,10)), label_binarize(pred_labels, classes=range(0,10))
        
        accuracy, precision, recall, f1 = get_metrics(true_labels, pred_labels)
        accuracies.append(accuracy)
        f1_scores.append(f1)
        
        for j in range(0, 10):
            # print(f"Class {j}: Prec:{precision[j]:.2f}, Recall:{recall[j]:.2f}, F_1 Score:{f1[j]:.2f}")
            precision_i, recall_i, _ = precision_recall_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
    
            average_precision = average_precision_score(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            axes[i].step(recall_i, precision_i, where="post", label=f"Class {j} AP={average_precision:.2f}")
            axes[i].set_title(f"PR-Curve {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend()
        axes[i].set_xlabel("Recall")
        axes[i].set_ylabel("Precision")
    
    # for j in range(i+1, 16):
    #     fig.delaxes(axes[j])
    
    plt.tight_layout()
    plt.show()
    return accuracies, f1_scores

ROC-AUC Curve¶

InĀ [17]:
def plot_rocauc(experiment_results, hyper_param_names, curve_type, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names

    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_scores']
        true_labels_bin = label_binarize(true_labels, classes=range(0, 10))

        # All Classes' ROC curve & ROC Area Under Curve
        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for j in range(10):
            fpr[j], tpr[j], _ = roc_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            roc_auc[j] = auc(fpr[j], tpr[j])

        # Macro-Average ROC & ROC-AUC
        all_fpr = np.unique(np.concatenate([fpr[j] for j in range(10)]))
        mean_tpr = np.zeros_like(all_fpr)
        for j in range(10):
            mean_tpr += np.interp(all_fpr, fpr[j], tpr[j])
        mean_tpr /= 10

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(true_labels_bin.ravel(), np.array(pred_scores).ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        # Plot only Macro or Micro ROC curves
        if curve_type == "macro_micro":
            axes[i].plot(fpr["macro"], tpr["macro"], label=f"Macro (AUC={roc_auc['macro']:.2f})")
            axes[i].plot(fpr["micro"], tpr["micro"], label=f"Micro (AUC={roc_auc['micro']:.2f})")
        elif curve_type == "all":
            # Plot all ROC curves
            for j in range(10):
                axes[i].plot(fpr[j], tpr[j], label=f"Class {j} (AUC={roc_auc[j]:.2f})")

        axes[i].plot([0, 1], [0, 1], "k--")
        axes[i].set_xlabel("False Positive Rate")
        axes[i].set_ylabel("True Positive Rate")
        axes[i].set_title(f"ROC Curve {i+1}, {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend(loc='lower right')

    plt.tight_layout()
    plt.show()

3.0.1 Datasets¶

InĀ [18]:
def split_train_valid(train_dataset, train_ratio):
    ori_len = len(train_dataset)
    train_size = int(train_ratio * ori_len)
    valid_size = ori_len - train_size

    # These are subsets!! Don't directly use them or you will spend 2 hours solving for it.
    train_subset, valid_subset = random_split(train_dataset, [train_size, valid_size])

    # Re-construct two SVHNDataset object from indecies    
    train_dataset_ = train_dataset.overwrite(indices=train_subset.indices)
    valid_dataset_ = train_dataset.overwrite(indices=valid_subset.indices)
    
    return train_dataset_, valid_dataset_

3.1 Experiment 1: Optimizer¶

In the standard process of gradient descent, each update is proportional to the negative gradient (first-order derivative) of the loss function with respect to the parameter. In this traditional process, the learning rate is fixed, and it may cause problems.

  • Oscillations. If locally, the learning rate is too high, the model will jump around the local minimum.
  • Slow convergence. If locally, the learning rate is too low, the model will spend a lot of epochs to converge to a local minimum.

To solve this problem, we enable the learning rate to be adaptive by introducing the "momentum", a velocity-like term which accumulates past gradients in the direction of consistent descent.

  • The velocity term is the weighted sum of previous gradients.
  • ...such that the update direction does not only rely on the current gradient, but also on previous ones.

The update of velocity is represnted as: $$ v_t=\beta v_{t-1} + (1-\beta)\cdot\nabla J(\theta) $$ where $\beta$ is the momentum coefficient. In our experiments, $\beta$ will be fixed to $0.9$.

The update of parameters will be: $$ \theta_{t} = \theta_{t-1}-\eta\cdot v_{t} $$ In this experiment, we focus on the performance of different optimizers, each has its own optimized way to update the momentum. We will fix other variables, including transform, epoch number and learning rate, and only adjust the optimizers. There are a few optimizers to be chozen:

  • Adaptive Moment Estimation (Adam)
  • Stochastic Gradient Descent (SGD)
  • Root Mean Square Propagation (RMSprop)
  • Adam with Weight Decay (AdamW)
  • Adaptive Gradient Algorithm (Adgrad)
  • SGD with Momentum and Nesterove Accelerated Gradient
InĀ [38]:
# Universal Train Dataset without splitting
exp1_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))

# Train-Validation Split
exp1_train_dataset, exp1_valid_dataset = split_train_valid(exp1_universal_train_dataset, train_ratio=0.8)

if not isinstance(exp1_train_dataset, SVHNDataset) or not isinstance(exp1_valid_dataset, SVHNDataset):
    raise TypeError("CISC3024 Custom Error: The dataset should be an instance of SVHNDataset.")

# Normalize
exp1_mean, exp1_std = exp1_train_dataset.get_meanstd()
exp1_hyperparams = {
    "num_epochs": 25,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

# Inject Transform
exp1_train_dataset.transform = exp1_hyperparams['transform']
exp1_valid_dataset.transform = exp1_hyperparams['transform']

# Test Dataset
exp1_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp1_hyperparams['transform']) 

del exp1_universal_train_dataset

print(f"Training Size:{exp1_train_dataset.__len__()}, Validation Size:{exp1_valid_dataset.__len__()}")
print(f"Channel Means: {exp1_mean}\nChannel Stds: {exp1_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.43772128224372864, 0.44378969073295593, 0.4728474020957947]
Channel Stds: [0.19793079793453217, 0.20086902379989624, 0.1968136429786682]

Define changing & non-changing hyper parameters.

InĀ [32]:
exp1_models = [SmallVGG().to(device) for _ in range(0,6)]

candidate_optimizers = [
    optim.Adam(exp1_models[0].parameters(), lr=exp1_hyperparams['lr']), 
    optim.SGD(exp1_models[1].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9),
    optim.RMSprop(exp1_models[2].parameters(), lr=exp1_hyperparams['lr']),
    optim.AdamW(exp1_models[3].parameters(), lr=exp1_hyperparams['lr'], weight_decay=0.01),
    optim.Adagrad(exp1_models[4].parameters(), lr=exp1_hyperparams['lr']),
    optim.SGD(exp1_models[5].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9, nesterov=True)]

for model in exp1_models:
    print(id(model), end=", ")
3044296310112, 3043927082032, 3043927083520, 3043927084144, 3043927085008, 3044137279152, 

Train, Validation and Test datasets.

Train, Validation and Test Data Loaders.

InĀ [40]:
# Data Loaders
exp1_train_loader = DataLoader(exp1_train_dataset, batch_size=128, shuffle=True)
exp1_valid_loader = DataLoader(exp1_valid_dataset, batch_size=128, shuffle=True)
exp1_test_loader = DataLoader(exp1_test_dataset, batch_size=128, shuffle=False)

Run Experiments

InĀ [35]:
def run_exp1(optimizers, models, hyper_params, train_loader, valid_loader):
    experiments = []
    for i, [optimizer, exp1_model] in enumerate(zip(optimizers, models)):
        print(f"Experiment {i+1}. Running experiment on optimizer: {optimizer.__class__.__name__}")

        criterion = hyper_params['criterion']
        num_epochs = hyper_params['num_epochs']
        train_losses, valid_losses = train_and_evaluate(exp1_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "optimizer": optimizer.__class__.__name__,
            "others":"same",
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": exp1_model.state_dict()
        })

        del exp1_model, criterion, optimizer
        torch.cuda.empty_cache()

    return experiments
InĀ [Ā ]:
exp1 = run_exp1(candidate_optimizers, exp1_models, exp1_hyperparams, exp1_train_loader, exp1_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp1, f"./models/exp1_{time_str}.pth")

Load Experiments

Load Experiment objects and plot results.

InĀ [Ā ]:
exp1_loaded = torch.load("./models/exp1_17305518422052872.pth")
exp1_results = get_experiment_results(exp1_loaded, test_hyperparam_names=["optimizer", "others"], extra_loader=exp1_test_loader)
InĀ [42]:
plot_el(exp1_loaded, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image
InĀ [43]:
plot_cm(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image

Precision-Recall Curve

Overfitted to inspect training performance. This "Best" does not tell the optimum optimizer.

InĀ [44]:
exp1_accuracies, exp1_f1s = plot_pr(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
print_metrics(exp1_accuracies, exp1_f1s)
No description has been provided for this image
Accuracies:
0.907 0.196 0.906 0.901 0.735 0.196 

F1 Score Lists:
0.912 0.935 0.938 0.874 0.921 0.905 0.881 0.906 0.847 0.867 | Avg F1=0.899, Std F1=0.028664446684648247
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.888 0.933 0.939 0.876 0.913 0.903 0.878 0.923 0.850 0.865 | Avg F1=0.897, Std F1=0.028470034992916605
0.887 0.935 0.936 0.871 0.914 0.907 0.871 0.908 0.830 0.843 | Avg F1=0.890, Std F1=0.03434799781092271
0.723 0.840 0.802 0.638 0.776 0.700 0.647 0.789 0.515 0.628 | Avg F1=0.706, Std F1=0.09492988307421246
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 1-th
InĀ [45]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="all", n_rows=1, n_cols=6)
No description has been provided for this image
InĀ [46]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="macro_micro", n_rows=1, n_cols=6)
No description has been provided for this image

3.2 Experiment 2: Epoch Number and Learning Rate¶

This experiment seeks to find the effect of different combinations of epoch numbers and learning rates on the training & testing performance of the neural network.

3.2.1 Experiment 2-1: Rough Search¶

In this sub-experiment, we perform a rough search on the epochs and learning rate. We promoted four possible values for both parameters: $$ \text{candidate epochs}=\{10, 15, 20, 25\} $$ $$ \text{candidate lr}=\{1.0\times 10^{-3},1.0\times 10^{-4},1.0\times 10^{-5},1.0\times 10^{-6}\} $$

InĀ [59]:
# Universal Train Dataset without splitting
exp2_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))

# Train & Validation Datasets
exp2_train_dataset, exp2_valid_dataset = split_train_valid(exp2_universal_train_dataset, train_ratio=0.8)
del exp2_universal_train_dataset # Unload the mill and kill the donkey

# Normalize
exp2_mean, exp2_std = exp2_train_dataset.get_meanstd()

exp2_hyperparams = {
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.Adam,
}

exp2_train_dataset.transform = exp2_hyperparams['transform']
exp2_valid_dataset.transform = exp2_hyperparams['transform']

# Test Dataset
exp2_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"test_32x32.mat"), transform=exp2_hyperparams['transform'])

print(f"Training Size:{exp2_train_dataset.__len__()}, Validation Size:{exp2_valid_dataset.__len__()}")
print(f"Channel Means: {exp2_mean}\nChannel Stds: {exp2_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.4374935030937195, 0.44353100657463074, 0.4726291298866272]
Channel Stds: [0.1980431079864502, 0.2009742110967636, 0.19704842567443848]
InĀ [60]:
candidate_epochs = [10, 15, 20, 25]
candidate_lr = [1e-3, 1e-4, 1e-5, 1e-6]
InĀ [61]:
exp2_train_loader = DataLoader(exp2_train_dataset, batch_size=128, shuffle=True)
exp2_valid_loader = DataLoader(exp2_valid_dataset, batch_size=128, shuffle=True)
exp2_test_loader = DataLoader(exp2_test_dataset, batch_size=128, shuffle=False)
InĀ [62]:
def run_exp2_1(epochs, lr_list, hyper_params, train_loader, valid_loader):
    combinations = list(itertools.product(epochs, lr_list))
    experiments = []
    for i, combo in enumerate(combinations):
        num_epochs, lr = combo

        print(f"Running Exp {i+1}: num_epoch={num_epochs}, lr={lr}")
        this_model = SmallVGG().to(device)
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
InĀ [Ā ]:
exp2_1 = run_exp2_1(candidate_epochs, candidate_lr, exp2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp2_1, f"./models/exp2-1_{time_str}.pth")
InĀ [Ā ]:
exp2_1_loaded = torch.load("./models/exp2-1_17305539358378615.pth")
exp2_1_results = get_experiment_results(exp2_1_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.1-1 Epoch-Loss Curve¶

We found that the key to the training performance of a model is the learning rate. Epoch number only controls the progress of training.

From the perspective of learning rate (each column), only the learning rate of $1.0\times 10^{-3}$ shows a sign of convergence under each candidate epochs. With this learning rate, the model even overfitted under experiments with an epoch number over $15$. The best model we conclude from this rough selection is the one with the combination of $\text{num\_epoch}=10\land\text{lr}=1.0\times10^{-3}$. The minimum validation loss is $36.648$ at step $7$, which is the lowest of all $16$ samples. However, this doesn't mean that it is optimal since it may jump over a local minimum.

Moreover, as we inspect the performance on smaller learning rates, we found that they tend to converge in a way further epoch steps. Moreover, for the learning rate $1.0\times 10^{-6}$, the learning rate is too low that the model can not even fit under nearly-finite epochs.

InĀ [65]:
plot_el(exp2_1_loaded, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-2 Confusion Matrix¶

In this rough search, the confusion matrix varies on different learning rates, and tends to be identical on different epochs.

Under a same epoch number, as leraning rate gets smaller, the confusion matrix gets "blurrer", meaning that the prediction is less accurate from the whole perspective. The learning rates under $1.0\times 10^{-5}$ are too low that the model can't converge in a reasonably number of epochs. For the lowest learning rate of $1.0\times 10^{-6}$, the model is not fitted at all. It classifies every number into 1, the number with the richest amount in the dataset.

InĀ [71]:
plot_cm(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-3 Precision-Recall Curve¶

From a numerical perspective over the testing performance, the combination of $\text{num\_epoch}=15\land\text{lr}=1.0\times10^{-3}$ gives the highest accuracy of $0.907$, highest average $F_1$ score of $0.916$ and the lowest $F_1$ variance per-class of $0.025$.

InĀ [72]:
exp2_1_accuracies, exp2_1_f1s = plot_pr(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
print_metrics(exp2_1_accuracies, exp2_1_f1s)
No description has been provided for this image
Accuracies:
0.917 0.825 0.336 0.196 0.912 0.865 0.471 0.196 0.907 0.878 0.663 0.196 0.908 0.880 0.713 0.196 

F1 Score Lists:
0.918 0.942 0.947 0.889 0.925 0.913 0.900 0.931 0.855 0.868 | Avg F1=0.909, Std F1=0.029051096513454423
0.811 0.904 0.880 0.774 0.838 0.791 0.745 0.860 0.703 0.731 | Avg F1=0.804, Std F1=0.06335319369501424
0.040 0.570 0.378 0.189 0.273 0.087 0.002 0.237 0.026 0.004 | Avg F1=0.180, Std F1=0.17824165219527116
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.914 0.934 0.945 0.882 0.932 0.896 0.878 0.919 0.863 0.875 | Avg F1=0.904, Std F1=0.02715514894076909
0.866 0.930 0.912 0.808 0.881 0.829 0.809 0.899 0.794 0.769 | Avg F1=0.850, Std F1=0.05240811428774516
0.154 0.743 0.509 0.321 0.550 0.430 0.122 0.550 0.005 0.080 | Avg F1=0.347, Std F1=0.23453887474256918
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.916 0.926 0.937 0.875 0.922 0.905 0.879 0.919 0.858 0.863 | Avg F1=0.900, Std F1=0.027185167062975517
0.875 0.932 0.913 0.830 0.890 0.849 0.841 0.904 0.810 0.815 | Avg F1=0.866, Std F1=0.04073441033554439
0.596 0.770 0.749 0.632 0.679 0.614 0.618 0.699 0.294 0.526 | Avg F1=0.618, Std F1=0.1283615011641036
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.904 0.943 0.926 0.881 0.914 0.913 0.876 0.921 0.863 0.850 | Avg F1=0.899, Std F1=0.02860358424237646
0.879 0.924 0.925 0.821 0.886 0.867 0.832 0.905 0.820 0.820 | Avg F1=0.868, Std F1=0.040548747485917386
0.689 0.842 0.773 0.646 0.711 0.672 0.640 0.763 0.460 0.549 | Avg F1=0.674, Std F1=0.10523037482252583
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 1-th
InĀ [73]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [74]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.2.2 Experiment 2-2: Detailed¶

Previous sub-experiment tells that the best combination from all the listed ones is $\text{num\_epoch}=15 \land \text{lr}=1.0\times 10^{-3}$.

This is a rough solution, as it may jump over local minimums. We want to find a better learning rate around $1.0\times 10^{-3}$, with an even more detailed distinction between candidate values, so that it may reveal a missing local minimum without using too much epochs.

We conducted an excessive experiment, purposely seeking an overfitting point over the listed candidate learning rates. We do this by setting the epoch number to $50$.

InĀ [102]:
exp2_2_hyperparams = {
    "num_epoch": 15,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.Adam,
}

# More detailed candidate learning rates around 1e-3, that is 10e-4.
exp2_2_candidate_lr = np.geomspace(1e-4, 5e-3, 8)
print(exp2_2_candidate_lr)
[0.0001     0.00017487 0.00030579 0.00053472 0.00093506 0.00163512
 0.0028593  0.005     ]
InĀ [103]:
def run_exp2_2(lr_list, hyper_params, train_loader, test_loader):
    experiments = []
    for i, lr in enumerate(lr_list):

        print(f"Running Exp {i+1}: lr={lr}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epoch']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, test_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
InĀ [Ā ]:
exp2_2 = run_exp2_2(exp2_2_candidate_lr, exp2_2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp2_2, f"./models/exp2-2_{time_str}.pth")
InĀ [Ā ]:
exp2_2_loaded = torch.load("./models/exp2-2_17305693866477516.pth")
exp2_2_results = get_experiment_results(exp2_2_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.2-1 Epoch-Loss Curve¶

For timing issues, an epoch of $15$ is our tolerance line. By inspecting the epoch-loss curve, we found that all the experiments are nearly overfitted around the end of the epochs.

From all the detailed searches, the learning rate of 9.35e-4, yields the lowest validation loss of $38.227$ at step $7$, which is the overfitting point. This minimum is worse than the one produced by the learning rate 1e-3, which is $36.549$. Therefore, for the trade-off of time and performance, we choose the combination of epoch=$15$ and lr=$0.001$ for the following experiments.

InĀ [108]:
plot_el(exp2_2_loaded, ["num_epochs", "lr"], n_rows=2, n_cols=4)
No description has been provided for this image

3.2.2-2 Confusion Matrix¶

At a glance, from the perspective of confusion matrix, the testing performance on unknown data is roughly identical.

InĀ [113]:
plot_cm(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
No description has been provided for this image

3.2.2-3 Precision-Recall Curve¶

By inspecting the evaluation metrics, we found our judgement correct. From all the over-fitted model, the model with learning rate of $7.0\times 10^{-4}$ yields the highest accuracy of $0.909$ and the highest average per-class $F_1$ score of $0.903$. Besides, the per-class $F_1$ score is also less variant under the learning rate of $7.0\times 10^{-4}$, with the standard deviation of $0.023$.

InĀ [114]:
exp2_2_accuracies, exp2_2_f1s = plot_pr(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
print_metrics(exp2_2_accuracies, exp2_2_f1s)
No description has been provided for this image
Accuracies:
0.873 0.885 0.905 0.906 0.912 0.903 0.893 0.882 

F1 Score Lists:
0.873 0.929 0.912 0.822 0.888 0.851 0.829 0.901 0.795 0.798 | Avg F1=0.860, Std F1=0.04542290722465954
0.892 0.936 0.916 0.850 0.905 0.854 0.843 0.906 0.819 0.808 | Avg F1=0.873, Std F1=0.04146430928239567
0.908 0.942 0.941 0.874 0.921 0.888 0.857 0.924 0.840 0.839 | Avg F1=0.893, Std F1=0.03748405274950539
0.911 0.942 0.941 0.868 0.923 0.889 0.875 0.917 0.868 0.829 | Avg F1=0.896, Std F1=0.03485676831682514
0.913 0.936 0.943 0.887 0.928 0.912 0.875 0.926 0.852 0.875 | Avg F1=0.905, Std F1=0.029020749926052154
0.901 0.932 0.937 0.880 0.908 0.912 0.873 0.918 0.835 0.841 | Avg F1=0.894, Std F1=0.033859473636702976
0.887 0.926 0.934 0.862 0.900 0.882 0.854 0.915 0.829 0.836 | Avg F1=0.883, Std F1=0.035070141262265965
0.873 0.921 0.919 0.859 0.888 0.880 0.832 0.891 0.814 0.827 | Avg F1=0.870, Std F1=0.03530344586873121
Best: 5-th

3.2.2-4 ROC-AUC Curve¶

The ROC-AUC Curve under all the detailed candidate learning rates are roughly identical.

InĀ [116]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="all", n_rows=2, n_cols=4)
No description has been provided for this image
InĀ [117]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=2, n_cols=4)
No description has been provided for this image

3.3 Experiment 3: Image Augmentation Parameters¶

3.3.1 Experiment 3-1: Rotation Angles and Crop Ratios¶

InĀ [169]:
exp3_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))

exp3_train_dataset, exp3_valid_dataset = split_train_valid(exp3_universal_train_dataset, train_ratio=0.8)
del exp3_universal_train_dataset

# The mean & std here will only be used for experiment 3-1.
exp3_1_mean, exp3_1_std = exp3_train_dataset.get_meanstd()

exp3_1_hyperparams = {
    "num_epochs": 50,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.Adam,
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

# Transform of train dataset will be altered in the experiments.
exp3_valid_dataset.transform = exp3_1_hyperparams['transform']
exp3_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp3_1_hyperparams["transform"])

print(f"Training Size:{exp3_train_dataset.__len__()}, Validation Size:{exp3_valid_dataset.__len__()}")
print(f"Channel Means: {exp3_1_mean}\nChannel Stds: {exp3_1_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.43759557604789734, 0.4438314437866211, 0.47290241718292236]
Channel Stds: [0.19781899452209473, 0.2008139044046402, 0.19679588079452515]
InĀ [170]:
# Group 1
candidate_angles = [15, 30, 45, 60]
candidate_crops = [0.08, 0.24, 0.40, 0.60] # Left Boundary
InĀ [171]:
exp3_valid_loader = DataLoader(exp3_valid_dataset, batch_size=128, shuffle=False)
exp3_test_loader = DataLoader(exp3_test_dataset, batch_size=128, shuffle=False)
InĀ [172]:
def run_exp3_1(angles, crops, hyper_params, train_dataset, valid_loader):
    combinations = list(itertools.product(angles, crops))
    experiments = []
    for i, combo in enumerate(combinations):
        angle, crop = combo
        
        print(f"Running Exp {i+1}: angles={angle}, crop={crop}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)

        # Define Transform
        this_transform = A.Compose([
            A.RandomResizedCrop(32, 32, scale=(crop, 1.0)),
            A.Rotate(limit=angle),
            A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs,
                                                       stop_early_params={
                                                           "min_delta": 0.01,
                                                           "patience": 5
                                                       })

        experiments.append({
            "angle": angle,
            "crop": crop,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        
        torch.cuda.empty_cache()

    return experiments
InĀ [Ā ]:
exp3_1 = run_exp3_1(candidate_angles, candidate_crops, exp3_1_hyperparams, exp3_train_dataset, exp3_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp3_1, f"./models/exp3-1_{time_str}.pth")
InĀ [Ā ]:
exp3_1_loaded = torch.load("./models/exp3-1_17306646089970913.pth")
exp3_1_results = get_experiment_results(exp3_1_loaded, test_hyperparam_names=["angle", "crop"], extra_loader=exp3_test_loader)
InĀ [175]:
plot_el(exp3_1_loaded, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [176]:
plot_cm(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [177]:
exp3_accuracies, exp3_f1s = plot_pr(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
print_metrics(exp3_accuracies, exp3_f1s)
No description has been provided for this image
Accuracies:
0.912 0.916 0.917 0.931 0.892 0.898 0.917 0.928 0.886 0.893 0.914 0.928 0.891 0.888 0.910 0.926 

F1 Score Lists:
0.889 0.942 0.942 0.885 0.939 0.922 0.887 0.924 0.843 0.848 | Avg F1=0.902, Std F1=0.03538451976963852
0.895 0.940 0.939 0.894 0.939 0.923 0.894 0.925 0.862 0.864 | Avg F1=0.908, Std F1=0.02847237430596348
0.899 0.936 0.945 0.893 0.935 0.927 0.900 0.923 0.876 0.858 | Avg F1=0.909, Std F1=0.02722603637075925
0.915 0.949 0.952 0.906 0.948 0.937 0.925 0.930 0.892 0.890 | Avg F1=0.925, Std F1=0.02181626856531463
0.892 0.927 0.924 0.845 0.925 0.890 0.870 0.875 0.845 0.837 | Avg F1=0.883, Std F1=0.03265762724812469
0.893 0.934 0.930 0.874 0.925 0.905 0.846 0.901 0.808 0.847 | Avg F1=0.886, Std F1=0.03969946400782543
0.904 0.943 0.947 0.889 0.934 0.921 0.896 0.911 0.873 0.878 | Avg F1=0.910, Std F1=0.024999702435631828
0.918 0.946 0.952 0.903 0.946 0.927 0.908 0.926 0.892 0.900 | Avg F1=0.922, Std F1=0.020141128246637333
0.837 0.922 0.941 0.842 0.913 0.900 0.865 0.915 0.797 0.797 | Avg F1=0.873, Std F1=0.04999486924122103
0.892 0.926 0.933 0.848 0.918 0.893 0.862 0.899 0.829 0.842 | Avg F1=0.884, Std F1=0.035008693882825154
0.901 0.942 0.939 0.871 0.935 0.914 0.901 0.913 0.874 0.877 | Avg F1=0.907, Std F1=0.025555951863289215
0.921 0.944 0.954 0.900 0.950 0.930 0.921 0.922 0.890 0.882 | Avg F1=0.922, Std F1=0.02344422620633198
0.874 0.929 0.931 0.858 0.887 0.893 0.855 0.923 0.823 0.834 | Avg F1=0.881, Std F1=0.036647593322034946
0.857 0.926 0.923 0.847 0.916 0.904 0.882 0.905 0.809 0.796 | Avg F1=0.876, Std F1=0.0446762695421183
0.904 0.940 0.940 0.882 0.923 0.905 0.887 0.914 0.866 0.855 | Avg F1=0.902, Std F1=0.027712221334215276
0.904 0.949 0.955 0.892 0.948 0.912 0.916 0.922 0.893 0.894 | Avg F1=0.919, Std F1=0.023072110295838992
Best: 4-th
InĀ [178]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [179]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.3.2 Experiment 3-2: Ratios & Bias¶

InĀ [188]:
exp3_2_hyperparams = {
    "num_epochs": 50,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.AdamW,
    "crop":0.6,
    "angle":15,
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

class AddBiasTransform:
    def __init__(self, bias: Union[int, Tuple[int, int]]) -> None:
        if isinstance(bias, tuple):
            self.bias1 = bias[0]
            self.bias2 = bias[1]
        else:
            self.bias1 = 0
            self.bias2 = bias

    def __call__(self, img: np.ndarray) -> np.ndarray:
        _dtype = img.dtype
        bias_value = random.randint(self.bias1, self.bias2)
        img = (img.astype(np.int16) + bias_value) % 256
        return img.astype(_dtype)
        
# Group 2
candidate_ratios = [0.25, 0.42, 0.58, 0.75]
candidate_channel_biases = [0, 32, 64, 128]

Control candidates for different variables

InĀ [189]:
def run_exp3_2(ratios, biases, hyper_params, train_dataset, valid_dataset):
    combinations = list(itertools.product(ratios, biases))
    experiments = []
    for i, combo in enumerate(combinations):
        ratio, bias = combo
        
        print(f"Running Exp {i+1}: ratio={ratio}, bias={bias}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)

        # Define Transform
        this_mean, this_std = train_dataset.get_meanstd(bias=bias)
        this_train_transform = A.Compose([
            A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
            A.RandomResizedCrop(32, 32, scale=(hyper_params['crop'], 1.0), ratio=(ratio, 1.0 / ratio)),
            A.Rotate(limit=hyper_params['angle']),
            A.Normalize(mean=this_mean, std=this_std),
            ToTensorV2()
        ])

        this_valid_transform = A.Compose([
            A.Normalize(mean=this_mean, std=this_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_train_transform
        valid_dataset.transform = this_valid_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs,
                                                       stop_early_params={
                                                           "min_delta": 0.01,
                                                           "patience": 5
                                                       })

        experiments.append({
            "ratio": ratio,
            "bias": bias,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        # del train_loader, valid_loader
        
        torch.cuda.empty_cache()

    return experiments
InĀ [Ā ]:
exp3_2 = run_exp3_2(candidate_ratios, candidate_channel_biases, exp3_2_hyperparams, exp3_train_dataset, exp3_valid_dataset)
time_str = str(time.time()).replace(".", "")
torch.save(exp3_2, f"./models/exp3-2_{time_str}.pth")
InĀ [Ā ]:
exp3_2_loaded = torch.load("./models/exp3-2_17307070782426171.pth")
exp3_2_results = get_experiment_results(exp3_2_loaded, test_hyperparam_names=["ratio", "bias"], extra_loader=exp3_test_loader)
InĀ [192]:
plot_el(exp3_2_loaded, ["ratio", "bias"], n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [193]:
plot_cm(exp3_2_results, ["ratio", "bias"], n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [194]:
exp3_2_accuracies, exp3_2_f1s = plot_pr(exp3_2_results, ["ratio", "bias"], n_rows=4, n_cols=4)
print_metrics(exp3_2_accuracies, exp3_2_f1s)
No description has been provided for this image
Accuracies:
0.924 0.912 0.877 0.885 0.925 0.911 0.913 0.830 0.924 0.913 0.908 0.808 0.928 0.898 0.861 0.852 

F1 Score Lists:
0.916 0.949 0.949 0.902 0.943 0.928 0.892 0.928 0.861 0.883 | Avg F1=0.915, Std F1=0.028178712032797315
0.910 0.938 0.938 0.866 0.932 0.924 0.898 0.914 0.856 0.873 | Avg F1=0.905, Std F1=0.02901513509491587
0.859 0.902 0.923 0.845 0.863 0.896 0.843 0.888 0.809 0.838 | Avg F1=0.867, Std F1=0.03323173297079609
0.864 0.914 0.926 0.856 0.888 0.888 0.854 0.890 0.825 0.844 | Avg F1=0.875, Std F1=0.030212915176477208
0.918 0.941 0.949 0.909 0.945 0.931 0.902 0.921 0.875 0.891 | Avg F1=0.918, Std F1=0.02315118767203162
0.890 0.933 0.936 0.887 0.931 0.924 0.899 0.900 0.854 0.875 | Avg F1=0.903, Std F1=0.02627288161038198
0.905 0.935 0.940 0.881 0.934 0.919 0.890 0.919 0.870 0.873 | Avg F1=0.907, Std F1=0.02514265979976644
0.774 0.862 0.885 0.806 0.838 0.865 0.796 0.820 0.738 0.785 | Avg F1=0.817, Std F1=0.04367929965531259
0.930 0.944 0.947 0.904 0.933 0.922 0.910 0.918 0.884 0.883 | Avg F1=0.917, Std F1=0.021337897218826583
0.883 0.938 0.937 0.884 0.947 0.920 0.898 0.916 0.868 0.855 | Avg F1=0.904, Std F1=0.03000580711454836
0.896 0.932 0.937 0.874 0.930 0.916 0.893 0.906 0.853 0.868 | Avg F1=0.901, Std F1=0.027517225325711638
0.787 0.821 0.879 0.762 0.819 0.814 0.790 0.825 0.720 0.747 | Avg F1=0.796, Std F1=0.04325151448553455
0.929 0.951 0.947 0.908 0.945 0.934 0.907 0.923 0.880 0.889 | Avg F1=0.922, Std F1=0.023251275786566786
0.895 0.931 0.921 0.872 0.907 0.901 0.865 0.901 0.839 0.861 | Avg F1=0.889, Std F1=0.027568983031421735
0.835 0.885 0.919 0.818 0.873 0.882 0.836 0.861 0.766 0.801 | Avg F1=0.848, Std F1=0.04310050816257488
0.809 0.875 0.914 0.806 0.859 0.880 0.842 0.825 0.780 0.813 | Avg F1=0.841, Std F1=0.03902650406749625
Best: 13-th
InĀ [195]:
plot_rocauc(exp3_2_results, ["ratio", "bias"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [196]:
plot_rocauc(exp3_2_results, ["ratio", "bias"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image
InĀ [219]:
def exp3_2_peek(dataset, hyper_params, index=21642, ratio=1.0, bias=0):
    exp3_train_dataset_sample = dataset.overwrite(range(exp3_train_dataset.__len__())) # Deep copy of original
    temp_mean, temp_std = exp3_train_dataset_sample.get_meanstd(bias=bias)
    exp3_train_dataset_sample.transform = A.Compose([
                                            A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
                                            A.RandomResizedCrop(32, 32, scale=(hyper_params["crop"], 1.0), ratio=(ratio, 1.0 / ratio)),
                                            A.Rotate(limit=hyper_params["angle"]),
                                            A.Normalize(mean=temp_mean, std=temp_std),
                                            ToTensorV2()
                                        ])
    peek(exp3_train_dataset_sample, index=index)
    del exp3_train_dataset_sample, temp_mean, temp_std
InĀ [233]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=57468, ratio=0.75, bias=0)
D:\Temps\temp\ipykernel_22900\259240058.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 57468.
Image Tnesor Size:torch.Size([3, 32, 32])
InĀ [232]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=57468, ratio=0.75, bias=32)
D:\Temps\temp\ipykernel_22900\259240058.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 57468.
Image Tnesor Size:torch.Size([3, 32, 32])
InĀ [231]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=57468, ratio=0.75, bias=64)
D:\Temps\temp\ipykernel_22900\259240058.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 57468.
Image Tnesor Size:torch.Size([3, 32, 32])
InĀ [227]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=57468, ratio=0.75, bias=128)
D:\Temps\temp\ipykernel_22900\259240058.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: AddBiasTransform(bias)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 57468.
Image Tnesor Size:torch.Size([3, 32, 32])